---
title: "JCL Replication"
author: "Zach Wallander, Sara Benesh and David Armstrong"
date: "21/01/2019"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE)
library(Zelig)
library(MASS)
library(BayesTree)
library(birdring)
library(dplyr)
library(lmtest)
library(car)
library(ggplot2)
effects <- coefs <-  vector(mode="list", length=3)
```

This document reproduces the analysis in "Advisors to Elites: Untangling Their Effects" by Wallander, Benesh and Armstrong.  

## Appendix 

First, we replicate the analysis in the appendix that considers a couple of different measures for cert-worthiness. We load the data, then estimate the Bayesian Additive Regression Tree to generate the first measure of cert-worthiness.  From the raw data, we removed the deadlisted cases. 

```{r}
setwd("~/Dropbox/Zach/JLC_replication")
load("cert_data.rda")
meanSD <- function(x, digits=3){
   out <- c(mean(x, na.rm=T), sd(x, na.rm=T))
   out <- round(out, digits)
   names(out) <- c("Mean", "SD")
   out
}
shuffle <- function(x,y){
   res <- matrix(c(rbind(x,y)), ncol=1)
   colnames(res) <- "B (HPD)"
   rownames(res)[seq(2, nrow(res), by=2)] <- ""
   rownames(res)[seq(1, nrow(res), by=2)] <- names(x)
   noquote(res)
}
mci <- function(x)c(mean(x, na.rm=T), quantile(x, c(.025, .975), na.rm=T))
x2test <- function(L, rhs, b, V){
    value.hyp <- L %*% b - rhs
    vcov.hyp <- L %*% V %*% t(L)
    SSH <- as.vector(t(value.hyp) %*% solve(vcov.hyp) %*% value.hyp)
    q <- NROW(L)
    p <- pchisq(SSH, q, lower.tail = FALSE)
    return(p)
}
```

The object `cert_data` contains the variables required to estimate the cert-worthiness model. 
 
Variable | Description
|----|---------|
docket | Docket number
casecert | Whether or not the case was granted cert
lctdir | Direction of lower court ruling
lowercourtdissent | Whether or not dissent existed in the lower court
abortion | Whether or not the case concerned abortion
deathpenalty | Whether or not the case was a death penalty case
allegationconflict | Whether or not a conflict among lower courts was alleged
constitutionalclaim | Whether or not the case made a contitutional claim
lowercourtreversal | Whether or not the Circuit Court reversed the decision of the District Court
uspetitioner | Whether the US government was a petitioner on the case
trueconflict | Whether or not a true conflict among lower courts existed 
amicusfavor | Whether there was at least one amicus brief filed in favor of cert
amicusagainst | Whether there was at least one amicus brief filed against cert



Table 1 in the Appendix gives the proportion of `ones' in each of the dichotomous variables below that were granted and not granted cert.  

```{r}
x <- as.matrix(cert_data[,c("casecert", "lctdir", "lowercourtdissent", "abortion", "deathpenalty", "allegationconflict", "constitutionalclaim", "lowercourtreversal", "uspetitioner", "trueconflict", "amicusfavor", "amicusagainst")])
x <- sapply(1:ncol(x), function(z)as.numeric(x[,z]))
cc <- complete.cases(x)
y <- x[which(cc),1]
x <- x[which(cc),-1]
tab <- round(t(do.call(rbind, by(x, list(y), colMeans))), 2)
ntab <- c("Lower Court Direction", "Lower Court Dissent", "Abortion", "Death Penalty", "Alleged Conflict", "Constitutional Claim", 
   "Lower Court Reversal", "US Petitioner", "True Conflict", "Amicus Favor Cert", "Amicus Against Cert")
rownames(tab) <- ntab
round(tab, 2)
```

Next, we can pull the relevant independent variables, ensure they are all numeric and then listwise delete. 

```{r}
x <- as.matrix(cert_data[,c("casecert", "lctdir", "lowercourtdissent", "abortion", "deathpenalty", "allegationconflict", "constitutionalclaim", "lowercourtreversal", "uspetitioner", "trueconflict", "amicusfavor", "amicusagainst")])
x <- sapply(1:ncol(x), function(z)as.numeric(x[,z]))
cc <- complete.cases(x)
y <- x[which(cc),1]
x <- x[which(cc),-1]
```

Next, we generate the Bayesian logistic regression probabilities of cert worthiness (assuming flat priors).  

```{r}
# estimate model 
pmod <- glm(y ~ x, family=binomial)
# generate the posterior draws of coefficients from the logistic regression model 
set.seed(1256)
b.parm <- mvrnorm(1500, coef(pmod), vcov(pmod))
# generate posterior draws of cert-worthiness
parm.post <- plogis(model.matrix(pmod) %*% t(b.parm))
# transpose matrix of posteriors to match output from BART
parm.post <- t(parm.post)
```

Now, we can do the same thing for the BART model. 

```{r}
# Estimate BART
set.seed(1120477)
out <- bart(x, y, ndpost=1500)
# Calculate posterior probabilities from fitted values
bart.post <- pnorm(out$yhat.train)
```

In the article, we suggest that there are no significant differences between the BART and logistic models.  We first showed this in the appendix with the overlap coefficient: 

```{r, fig.show='hide'}
# initialize the object to hold the overlap coefficients
olaps <- rep(NA, ncol(bart.post))
# calculate the overlap for each column in the matrices of posteriors
for(i in 1:length(olaps)){
    olaps[i] <- overlap(parm.post[,i], bart.post[,i], edge.of.parameter.space=T)
}
# identify the outlying cases
o <- which(colMeans(parm.post) < .55 & colMeans(bart.post) > .65)
# summarize all overlaps
summary(olaps)
# summarize overlaps for the non-outlying cases
summary(olaps[-o])
# summarize overlaps for only outlying cases. 
summary(olaps[o])
```

We also showed it by calculating the difference between the two matrices of posterior draws. 

```{r}
# calculate difference between two matrices of probabilities
diffs <- parm.post - bart.post
# for each column, figure out what proportion of those differences are bigger than 0
pv <- apply(diffs, 2, function(x)mean(x > 0))
## Note no p-values outside the range (0.05, 0.95)
summary(pv)
```

Note that these are summary statistics of the Bayesian p-values of difference for each observation's BART and Logit probabilities of cert-worthiness.  Since the minimum value is `r round(min(pv), 3)` and the maximum value is `r round(max(pv),3)`, none of them fall outside of the range (0.05, 0.95), which would indicate a significant difference.  There are three outliers, but even those values have wide enough posterior distributions to not indicate a statistically reliable departure.   With this, we can generate Figure A1 in the appendix. 

```{r, fig.height=8, fig.width=8, out.width=".65\\textwidth", out.height=".65\\textwidth", fig.align="center"}
## Figure A1
tiff("figureA1.tiff", width=8, height=8, units="in", res=300)
plot(colMeans(parm.post), colMeans(bart.post), xlab="Logistic Regression Posterior Mean Probabilities", ylab="BART Posterior Mean Probabilities")
ol <- which(colMeans(parm.post) < .55 & colMeans(bart.post) > .65)
ol0 <- ol[-8]
ol1 <- ol[8]
text(colMeans(parm.post)[ol0], colMeans(bart.post)[ol0], cert_data$docket[ol0], 
    pos=c(4, 2, 2, 2, 2, 2, 2,  2, 2, 2, 4))
text(.556, .903, cert_data$docket[ol1], 
    pos=1)
dev.off()
```

Now, we can create a data set of cert probabilities that we can add to existing data to use in later models. 

```{r}
cert_probs <- array(dim=c(nrow(cert_data), 1500))
cert_probs[which(cc), ] <- t(parm.post)
cert_probs <- as.data.frame(cert_probs)
cert_probs$docket <- cert_data$docket
```

We can then merge the cert probabilities with our model data and remove missing cases. 

```{r}
cert_post <-  left_join(data.frame(docket=moddat$docket), cert_probs)
cert_post <- cert_post[,-1]
moddat$certworthy1 <- rowMeans(cert_post)
```


## Data and Methods 

The first thing we need to do is calculat ethe $\tau$ terms for the rare events logistic regression models.  We know that the overall discuss rate is 0.03 and that only roughly 20 percent of cases are discussed, leading to an adjusted population probability of cert of $\frac{0.03}{0.2} = 0.15$.  We start by pulling each recommender's recommendation and the true case cert status.  We put all these variables in a data frame called `temp`.  

```{r}
bv <- moddat$blackmunvoteoncert2
mc <- moddat$mcrecommendation
bc <- moddat$blackrecommendation
bv <- ifelse(is.na(bv), 0, bv)
mc <- ifelse(is.na(mc), 0, mc)
bc <- ifelse(is.na(bc), 0, bc)
temp <- data.frame(bv=bv, bc=bc, mc=mc, casecert=moddat$casecert)
```

Next, for the memo clerk, we calculate the $\tau$ term as follows (using the 85/15 grant rate among the discussed cases): 

```{r}
prop.pop <- c(.85, .15)
t.mc.mod <- glm(mc ~ as.factor(casecert)-1, data=temp, family=binomial)
set.seed(64502)
p.mc <- plogis(mvrnorm(1500, t.mc.mod$coef, vcov(t.mc.mod)))
t.mc <- p.mc %*% prop.pop
meanSD(t.mc)
```

What happens above is we estimate a GLM of the memo clerk's recommendation on case-cert.  This allows us to get sampling variability parameters for the logit of the two probabilities.  We then use those estimates to get the posterior distribution of the $\tau$ terms for the memo clerk.  We follow the same procedure for Blackmun's clerk and Blackmun himself. 

```{r}
# Blackmun's Clerk
t.bc.mod <- glm(bc ~ as.factor(casecert)-1, data=temp, family=binomial)
set.seed(1205604)
p.bc <- plogis(mvrnorm(1500, t.bc.mod$coef, vcov(t.bc.mod)))
t.bc <- p.bc %*% prop.pop
meanSD(t.bc)

# Blackmun
t.bv.mod <- glm(bv ~ as.factor(casecert)-1, data=temp, family=binomial)
set.seed(1105042)
p.bv <- plogis(mvrnorm(1500, t.bv.mod$coef, vcov(t.bv.mod)))
t.bv <- p.bv %*% prop.pop
meanSD(t.bv)
```

We then put all of the $\tau$ estimates in the same data frame. 

```{r}
tau <- data.frame(mc = t.mc, bc=t.bc, bv=t.bv)
```

The final step before estimating the models is to calculate Blackmun's distance to the memo clerk's justice.  We first gathered Blackmun's Martin-Quinn scores for the time-period in question: 

```{r}
black.ideo <- c( -.7661515 , -.9495762 , -.9878716 , -1.001301 , -.8643821 , -1.15319 , -1.383177 , -1.545243 , -1.802584 )
```

Next, we use the docket to identify the year and then subtract 85 from it, to generate integer values starting at 1 that we will use to attach the appropriate Blackmun ideology score. 

```{r}
year <- as.numeric(gsub("(^\\d{2}).*", "\\1", moddat$docket))-85
moddat$black.ideo <- black.ideo[year]
```

Finally, we can calcualte the absolute distance between the two actors: 

```{r}
moddat$dist_black_mc <- abs(moddat$black.ideo-moddat$mqtermprior)
```

### Pool Memo Writer's Recommendation.

We model the pool memo writer's recommendation with a rare-events logit model.  In particular, we loop throught the posterior values of both $\tau$ and cert-worthiness.  For each draw from the posterior of those two terms, we estimate an RE-logit model and take one draw from $N\left(\hat{b}, V(\hat{b})\right)$.  We save that draw in the object `b1`.  We then simply accumulate those draws as we loop through the posterior draws of $\tau$ and cert-worthiness. 

```{r}
w <- which(complete.cases(moddat) & complete.cases(cert_post))
cert_post <- cert_post[w,] 
moddat <- moddat[w,] 

b1 <- array(dim=c(1500, 4))
set.seed(12294292)
for(i in 1:1500){
    m1a <- relogit(mcrecommendation ~ lctdir + cert_post[,i] + mcdist, data=moddat, tau=tau$mc[i])
    b1[i, ] <- mvrnorm(1, coef(m1a), vcov(m1a))
}
```

We can then summarise the posterior distribution of the coefficients from this model.  The results below are the same as the ones in the first column of Table 1 in the article. 

```{r}
h1 <- apply(b1, 2, coda:::HPDinterval.mcmc)
coef1 <- apply(b1, 2, mean)
p1 <- apply(b1, 2, function(x)mean(x > 0))
ph1 <- paste("(", apply(round(h1, 2), 2, paste, collapse=", "), ")", sep="")
pb1 <- round(coef1, 2)
names(pb1) <- c("Intercept", "Lower Court Direction", "Cert Worthiness", "Distance to Court Median")
shuffle(pb1, ph1)
coefs[[1]] <- shuffle(pb1, ph1)
```

Next, we can calculate model fit for the memo clerk's recommendation.  We use the proportional reduction in error (PRE).  To calcualte the PRE, we use the posterior mean values of all variable parameters in the model (coefficients, cert-worthiness and $\tau$).  To do this, we use the posterior mean of the coefficients `b1` and re-estimate the model using the posterior means of cert-worthiness and and $\tau$ in order to get the appropriate design matrix.  

```{r}
b1m <- colMeans(b1)
m1a <- relogit(mcrecommendation ~ lctdir + plogis(certworthy1) + mcdist, data=moddat, tau=mean(tau$mc))
```
Next, we can calculate the probability of being granted cert. 

```{r}
p1m <- plogis(model.matrix(m1a) %*% b1m)
```

The next step is to find the cutoff in probabilities maximizes classification accuracy.  We accomplish this using a grid search over the range 0.01 and 0.99.  Because of the rare events logit mdoel, the usual value of 0.5 is not an appropriate value for classification. 

```{r}
res <- NULL
gs <- seq(0.01, 0.99, length=1000)
for(x in gs){
    res <- c(res, mean((p1m > x) ==  model.response(model.frame(m1a))))
}
```
What we find is that the optimal probability for classifying zeros and ones is `r round(gs[which.max(res)], 3)`.  Then, we can calculate the PRE as below. 

```{r}
y1 <- model.response(model.frame(m1a))
my1 <- mean(y1)
pm1 <- max(my1, 1-my1)
pre1 <- (max(res) - pm1)/(1-pm1)
```

The PRE is then, `r round(pre1, 3)` 


To calculate effects of each variable, we use the following basic formula. 

  1. Create two copies of the design matrix from our model $X^{(1)}$ and $X^{(2)}$. For variable $j$, we set all values of $X_{j}^{(1)}$ to the minimum value of $x_{j}$.  Then, we set $X_{j}^{(2)}$ to the maximum value of $x_{j}$.   
  2. Using our (# draws x k) matrix $B$ that captures the postrior draws from the model's coefficients, we calculate the posterior predicted probabilities of recommending cert as $p^{(1)} = X^{(1)}B^{\prime}$ and $p^{(2)} = X^{(2)}B^{\prime}$. 
  3. We then take the difference of the two predicted probability matrices: $\Delta p = p^{(2)} - p^{(1)}$, which is (N x # draws).
  4. We then take the mean in each column of $\Delta p$, giving us the posterior distribution of average effects.  
  5. Finally, we take the mean of the values from step 4, giving us $\overline{\Delta p}$ - the average of the average first differences.   

We first do this with the Lower Court Direction variable.  This is the variable is the second column in the design matrix. Since lower court direction is a dummy variable (0=conservative, 1=liberal), we use these two values in our analysis. First, we create two copies of the model matrix and replace the second column of the first copy with zero and the second column of the second copy with 1. 

```{r}
m1a <- relogit(mcrecommendation ~ lctdir + certworthy1 + mcdist, data=moddat, tau=tau$mc[i])
X1 <- X2 <- model.matrix(m1a)
X1[,2] <- 0
X2[,2] <- 1
```

We can then create the relevant posterior distribution first differences for each observation, which we save in the object `mes`. 

```{r}
mes <- plogis(X2 %*% t(b1)) - plogis(X1 %*% t(b1))
bds <- coda:::HPDinterval.mcmc(colMeans(mes))
bds <- paste("(", round(bds[1], 3), ", ", round(bds[2], 3), ")", sep="")
```

The effect of the variable is `r round(mean(colMeans(mes)), 2)` and the 95% HPD for this effect is `r noquote(bds)`.  Note that the credible interval covers zero, so the effect is not statistically reliable.  

Next, we can do the same for cert-worthiness. Since we use the same steps as above, we omit the commentary between steps below. 

```{r}
X1 <- X2 <- model.matrix(m1a)
X1[,3] <- 0
X2[,3] <- 1
mes2 <- plogis(X2 %*% t(b1)) - plogis(X1 %*% t(b1))
bds2 <- coda:::HPDinterval.mcmc(colMeans(mes2))
bds2 <- paste("(", round(bds2[1], 3), ", ", round(bds2[2], 3), ")", sep="")
```

The effect of the variable is `r round(mean(colMeans(mes2)), 2)` and the 95% HPD for this effect is `r noquote(bds2)`.  Note that the credible interval is bounded well away from zero, so the effect is statistically reliable.  

Finally, we can do the same for distance to court median. 

```{r}
X1 <- X2 <- model.matrix(m1a)
X1[,4] <- 0
X2[,4] <- 5
mes3 <- plogis(X2 %*% t(b1)) - plogis(X1 %*% t(b1))
bds3 <- coda:::HPDinterval.mcmc(colMeans(mes3))
bds3 <- paste("(", round(bds3[1], 3), ", ", round(bds3[2], 3), ")", sep="")
```

The effect of the variable is `r round(mean(colMeans(mes3)), 2)` and the 95% HPD for this effect is `r noquote(bds3)`.  Note that the credible interval is bounded well away from zero, so the effect is statistically reliable.  

In sum, of the three variables in the model, cert-worthiness is the only one that has any statistically, or substantively, meaningful impact. 


### Blackmun's Clerk's Recommendation.

We can do all of the same steps with Blackmun's Clerk's recommendation.  First, we estimate the relevant model. 

```{r}
b2 <- array(dim=c(1500, 7))
set.seed(692042)
    for(i in 1:1500){
    m2a <- relogit(blackrecommendation ~ blackmundistance + mcrecommendation*dist_black_mc + cert_post[,i] + lctdir, data=moddat, tau=tau$bc[i])
    b2[i, ] <- mvrnorm(1, coef(m2a), vcov(m2a))
}
```

Below, we summarize the model.  This reproduces the model in the second column of Table 1. 

```{r}
h2 <- apply(b2, 2, coda:::HPDinterval.mcmc)
coef2 <- apply(b2, 2, mean)
p1 <- apply(b2, 2, function(x)mean(x > 0))
ph2 <- paste("(", apply(round(h2, 2), 2, paste, collapse=", "), ")", sep="")
pb2 <- round(coef2, 2)
names(pb2) <- c("Intercept", "Blackmun Distance to Median", "Memo Clerk Recommendation", "Blackmun Distance to Memo Clerk", "Cert-worthiness", "Lower Court Direction", "Memo Clerk Recommendation x Blackmun Distance to Memo Clerk")
shuffle(pb2, ph2)
coefs[[2]] <- shuffle(pb2, ph2)
```

Next, we can calculate the PRE in the same way that we did above: 

```{r}
b2m <- colMeans(b2)
m2a <- relogit(blackrecommendation ~ blackmundistance + mcrecommendation*dist_black_mc + plogis(certworthy1) + lctdir, data=moddat, tau=mean(c(.072,.132)))
p2m <- plogis(model.matrix(m2a) %*% b2m)
res <- NULL
for(x in seq(0.01, 0.99, length=1000)){
    res <- c(res, mean((p2m > x) ==  model.response(model.frame(m2a))))
}
y2 <- model.response(model.frame(m2a))
my2 <- mean(y2)
pm2 <- max(my2, 1-my2)
pre2 <- (max(res) - pm2)/(1-pm2)
```

The PRE is then, `r round(pre2, 3)`.  Moving on to the effects, we can calculate the effect of the lower court decision as: 

```{r}
m2a <- relogit(blackrecommendation ~ blackmundistance + mcrecommendation*dist_black_mc + certworthy1 + lctdir, data=moddat, tau=mean(tau$bc))
X2ma <- X2mb <- model.matrix(m2a)
X2ma[,6] <- 0
X2mb[,6] <- 1

mes <- plogis(X2mb %*% t(b2)) - plogis(X2ma %*% t(b2))
bds <- coda:::HPDinterval.mcmc(colMeans(mes))
bds <- paste("(", round(bds[1], 3), ", ", round(bds[2], 3), ")", sep="")
```

The effect of the variable is `r round(mean(colMeans(mes)), 2)` and the 95% HPD for this effect is `r noquote(bds)`.  

Next, we move on to a discussion of the memo clerk's recommendation and cert-worthiness, which are interacted together.  We then make Figure 1 from the article. 

```{r, fig.height=8, fig.width=8, out.width="\\textwidth", out.height="\\textwidth", fig.align="center"}
# Set evaluation points for cert-worthiness
c2 <- seq(0, 1, length=4)
# generate hypothetical data for memo-clerk recommendation, distance of blackmun to memo clerk and cert-worthiness
eg <- expand.grid(mc = c(0,1), dist_black_mc =seq(.05, 4.55, length=10), certworthy1=c2)
# loop over the rows in the hypothetical data
res1 <- NULL
for(i in 1:nrow(eg)){
    #  make a copy of the data
    # set the variables of interest (mc recommendation, cert-worthiness and distance of Blackmun to mc
    # in our copy of the data to the values in the fake data for each row in turn
    tmp <- moddat
    tmp$mcrecommendation <- eg[i,1]
    tmp$dist_black_mc <- eg[i,2]
    tmp$certworthy1 <- eg[i,3]
    # make the design matrix for the new hypothetical data
    X1 <- model.matrix(formula(m2a), data=tmp)
    # replace cert-worthiness with the correct value
    X1[,5] <- eg[i,3]
    # calculate the posterior distribution of the predicted probabilities
    pp1 <- plogis(X1 %*% t(b2))
    # find the mean and confidence intervals for the predicted probabilities
    q1 <- mci(colMeans(pp1))
    # collect results
    res1 <- rbind(res1, q1)
}
# make results into a data frame with the hypothetical data, too.
colnames(res1) <- c("mean", "lower", "upper")
rownames(eg) <- rownames(res1) <- NULL
plot.dat <- cbind(eg, res1)
# change the values of cert-worthy in our plot data to a factor
plot.dat$certworthy1 <- factor(plot.dat$certworthy1, levels=sort(unique(plot.dat$certworthy1)), labels=paste("Pr(Cert=", round(sort(unique(plot.dat$certworthy1)), 3), ")", sep=""))
# change memo clerk's recommendation into a factor
plot.dat$mc <- factor(plot.dat$mc, labels=c("No", "Yes"))
# make the figure
# tiff("figure1.tiff", width=8, height=8, units="in", res=300)
mp <- ggplot(plot.dat, aes(x=dist_black_mc, y=mean)) + theme_bw()
mp + geom_ribbon(aes(ymin=lower, ymax=upper, fill=mc), alpha=.25) + scale_fill_manual("Memo Clerk\nRecommenation",values=c("gray25", "gray75")) + facet_wrap(~certworthy1) +
    geom_line(aes(x=dist_black_mc, y=mean, color=mc), show.legend=F) + scale_colour_manual(values=c("black", "black")) +
    guides(fill=guide_legend(title="Memo Clerk\nRecommendation"), title.position="top") + theme(legend.position="top") +
    labs(x = "Ideological Distance to Blackmun", y="Pr(Blackmun's Clerk Recommends Cert)")
# dev.off()
```

Finally, we can consider the indpeendent effect of cert-worthiness.

```{r}
X2ma <- X2mb <- model.matrix(m2a)
X2ma[,5] <- 0
X2mb[,5] <- 1

mes <- plogis(X2mb %*% t(b2)) - plogis(X2ma %*% t(b2))
bds <- coda:::HPDinterval.mcmc(colMeans(mes))
bds <- paste("(", round(bds[1], 3), ", ", round(bds[2], 3), ")", sep="")
```

The effect of the variable is `r round(mean(colMeans(mes)), 2)` and the 95% HPD for this effect is `r noquote(bds)`.  

## Blackmun's Vote on Cert

Blackmun's vote on cert is the ultimate outcome of interest.  We indicate in the article that we test a multiplicative specification against an additive one and find no statistical difference.  We first offer a couple of different frequentist options for testing these models using just the posterior means of $\hat{c}$ and $\tau$.  First, is the likelihood-ratio test. 

```{r}
m3a <- relogit(blackmunvoteoncert2 ~ blackmundistance +  blackrecommendation*mcrecommendation*dist_black_mc + plogis(certworthy1) + lctdir,
    data=moddat, tau=mean(tau$bv))
m3b <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation +mcrecommendation*dist_black_mc +  plogis(certworthy1) + lctdir, data=moddat, tau=mean(tau$bv))
lrtest(m3a, m3b)
```

Next is the Wald Test: 

```{r}
n <- colnames(model.matrix(m3a))
linearHypothesis(m3a, paste(n[c(8:11)], "=0", sep=""))
```

The slightly better test that we suggest can be calculated below.  First, we can estimate the multiplicative model as we did above: 

```{r}
b3 <- array(dim=c(1500, 11))
set.seed(1561)
for(i in 1:1500){
    m3a <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation*mcrecommendation*dist_black_mc +
        cert_post[,i] + lctdir, data=moddat, tau=tau$bv[i])
    b3[i, ] <- mvrnorm(1, coef(m3a), vcov(m3a))
}
```

Next, we can specify the appropriate elements of the $\chi^{2}$ test for linear hypotheses.  The function to perform this test is defined in the first code chunk above. 

```{r}
L <- matrix(0, ncol=ncol(b3), nrow=4)
L[cbind(1:4, 8:11)] <- 1
rhs <- rep(0, 4)
x2test(L, rhs, colMeans(b3), var(b3))
```

Since the p-value is bigger than 0.05, we cannot reject the null hypothesis that the two models are not different from each other.  Thus, we re-estimate the model without the interaction terms.   

A more clearly Baysian test woudld have us calculate the log likelihoods for each model, which we can do below. 

```{r}
b3a <- array(dim=c(1500, ncol(model.matrix(m3a))))
b3b <- array(dim=c(1500, ncol(model.matrix(m3b))))
p3a <- p3b <- array(dim=c(1500, nrow(moddat)))
deva <- devb <- rep(NA, 1500)
set.seed(6603)
for(i in 1:1500){
    m3a <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation*mcrecommendation*dist_black_mc +
        cert_post[,i] + lctdir, data=moddat, tau=tau$bv[i])
    m3b <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation + mcrecommendation*dist_black_mc +
        cert_post[,i] + lctdir, data=moddat, tau=tau$bv[i])
b3a[i,] <- mvrnorm(1, coef(m3a), vcov(m3a))
b3b[i,] <- mvrnorm(1, coef(m3b), vcov(m3b))
p3a[i,] <- predict(m3a, type="response")
p3b[i,] <- predict(m3b, type="response")
deva[i] <- deviance(m3a)
devb[i] <- deviance(m3b)
}
y <- model.response(model.frame(m3a))
illa <- apply(p3a, 1, function(x)ifelse(y==1, log(x), log(1-x)))
illb <- apply(p3b, 1, function(x)ifelse(y==1, log(x), log(1-x)))
```

Then, we calculate the $\chi^2$ for each iteration of the markov chain and then find how much of that distribution lies below the theoretical cutoff for the appropriate $\chi^2$ distribution under the null. 

```{r}
mean(2*(colSums(illa) - colSums(illb)) <  qchisq(0.95, 4))
```

Since all of the distribution falls beneath the cutoff, we are confident that the two models are not significantly different.  

Finally, we can appeal to the logic of  Clarke test.  To this end, we calculate the difference in the individual log-likelihoods across the two models.  We can then calculate the proportion of times that the larger model log-likelihood is bigger.  

```{r}
illD <- illa > illb
summary(colMeans(illD))
mean(colMeans(illD) < .46)
```

Across the 1500 simulations, at most, slightly less than 50% of the observations had higher log-likelihoods for the interactive model.  This is certainly not evidence that 

Now that we have settled on a model, we can use the same methods as above to interrogate it.  

```{r}
hb3 <- apply(b3b, 2, coda:::HPDinterval.mcmc)
coef3 <- apply(b3b, 2, mean)
p1 <- apply(b3b, 2, function(x)mean(x > 0))
phb3 <- paste("(", apply(round(hb3, 2), 2, paste, collapse=", "), ")", sep="")
pb3b <- round(coef3, 2)
names(pb3b) <- c("Intercept", "Blackmun Distance to Median", "Blackmun's Clerk's Recommendation", "Memo Clerk Recommendation", "Blackmun Distance to Memo Clerk", "Cert-worthiness", "Lower Court Direction", "Memo Clerk Recommendation x Blackmun Distance to Memo Clerk")
shuffle(pb3b, phb3)
coefs[[3]] <- shuffle(pb3b, phb3)
```

Next, we can calculate the PRE in the same way that we did above: 

```{r}
b3bm <- colMeans(b3b)
m3a <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation + mcrecommendation*dist_black_mc +
        certworthy1 + lctdir, data=moddat, tau=mean(tau$bv))
p3m <- plogis(model.matrix(m3a) %*% b3bm)
res <- NULL
for(x in seq(0.01, 0.99, length=1000)){
    res <- c(res, mean((p3m > x) ==  model.response(model.frame(m3a))))
}
y3 <- model.response(model.frame(m3a))
my3 <- mean(y3)
pm3 <- max(my3, 1-my3)
pre3 <- (max(res) - pm3)/(1-pm3)
```

The PRE is then, `r round(pre3, 3)`.  Now, we can move on to calculating the effects. 

```{r}
X3ma <- X3mb <- model.matrix(m3a)
X3ma[,6] <- 0
X3mb[,6] <- 1

mes <- plogis(X3mb %*% t(b3b)) - plogis(X3ma %*% t(b3b))
bds <- coda:::HPDinterval.mcmc(colMeans(mes))
bds <- paste("(", round(bds[1], 3), ", ", round(bds[2], 3), ")", sep="")
```

The effect of cert-worthiness is `r round(mean(colMeans(mes)), 2)` and the 95% HPD for this effect is `r noquote(bds)`.  

```{r}
X3ma <- X3mb <- model.matrix(m3a)
X3ma[,3] <- 0
X3mb[,3] <- 1

mes <- plogis(X3mb %*% t(b3b)) - plogis(X3ma %*% t(b3b))
bds <- coda:::HPDinterval.mcmc(colMeans(mes))
bds <- paste("(", round(bds[1], 3), ", ", round(bds[2], 3), ")", sep="")
```

The effect of Blackmun's clerk's recommendation is `r round(mean(colMeans(mes)), 2)` and the 95% HPD for this effect is `r noquote(bds)`.  

## Robustness Check - Using BART Posteriors for Cert-worthiness. 

One additional robustness check for our results is to use the competing BART posteriors as our measure of cert-worthiness.  To do this, we simply replace the `cert_post` object with the BART posteriors: 

```{r}
cert_probs <- array(dim=c(nrow(cert_data), 1500))
cert_probs[which(cc), ] <- t(bart.post)
cert_probs <- as.data.frame(cert_probs)
cert_probs$docket <- cert_data$docket
```

We can then merge the cert probabilities with our model data and remove missing cases. 

```{r}
cert_post <-  left_join(data.frame(docket=moddat$docket), cert_probs)
cert_post <- cert_post[,-1]
moddat$certworthy1 <- rowMeans(cert_post)
```

In general, we follow the same modeling strategy as above, so we do so generally below without the intervening commentary. 

### Pool Memo Writer's Recommendation.

```{r}
w <- which(complete.cases(moddat) & complete.cases(cert_post))
cert_post <- cert_post[w,] 
moddat <- moddat[w,] 
b1 <- array(dim=c(1500, 4))
set.seed(12294292)
for(i in 1:1500){
    m1a <- relogit(mcrecommendation ~ lctdir + cert_post[,i] + mcdist, data=moddat, tau=tau$mc[i])
    b1[i, ] <- mvrnorm(1, coef(m1a), vcov(m1a))
}

h1 <- apply(b1, 2, coda:::HPDinterval.mcmc)
coef1 <- apply(b1, 2, mean)
p1 <- apply(b1, 2, function(x)mean(x > 0))
ph1 <- paste("(", apply(round(h1, 2), 2, paste, collapse=", "), ")", sep="")
pb1 <- round(coef1, 2)
names(pb1) <- c("Intercept", "Lower Court Direction", "Cert Worthiness", "Distance to Court Median")
coefs[[1]] <- cbind(coefs[[1]], shuffle(pb1, ph1))
```

### Blackmun's clerk
```{r}
b2 <- array(dim=c(1500, 7))
set.seed(692042)
    for(i in 1:1500){
    m2a <- relogit(blackrecommendation ~ blackmundistance + mcrecommendation*dist_black_mc + cert_post[,i] + lctdir, data=moddat, tau=tau$bc[i])
    b2[i, ] <- mvrnorm(1, coef(m2a), vcov(m2a))
}

h2 <- apply(b2, 2, coda:::HPDinterval.mcmc)
coef2 <- apply(b2, 2, mean)
p1 <- apply(b2, 2, function(x)mean(x > 0))
ph2 <- paste("(", apply(round(h2, 2), 2, paste, collapse=", "), ")", sep="")
pb2 <- round(coef2, 2)
names(pb2) <- c("Intercept", "Blackmun Distance to Median", "Memo Clerk Recommendation", "Blackmun Distance to Memo Clerk", "Cert-worthiness", "Lower Court Direction", "Memo Clerk Recommendation x Blackmun Distance to Memo Clerk")
coefs[[2]] <- cbind(coefs[[2]], shuffle(pb2, ph2))
```

### Blackmun's vote

```{r}
b3b <- array(dim=c(1500, ncol(model.matrix(m3b))))
set.seed(6603)
for(i in 1:1500){
   m3b <- relogit(blackmunvoteoncert2 ~ blackmundistance + blackrecommendation + mcrecommendation*dist_black_mc +
        cert_post[,i] + lctdir, data=moddat, tau=tau$bv[i])
   b3b[i,] <- mvrnorm(1, coef(m3b), vcov(m3b))
}

hb3 <- apply(b3b, 2, coda:::HPDinterval.mcmc)
coef3 <- apply(b3b, 2, mean)
p1 <- apply(b3b, 2, function(x)mean(x > 0))
phb3 <- paste("(", apply(round(hb3, 2), 2, paste, collapse=", "), ")", sep="")
pb3b <- round(coef3, 2)
names(pb3b) <- c("Intercept", "Blackmun Distance to Median", "Blackmun's Clerk's Recommendation", "Memo Clerk Recommendation", "Blackmun Distance to Memo Clerk", "Cert-worthiness", "Lower Court Direction", "Memo Clerk Recommendation x Blackmun Distance to Memo Clerk")
coefs[[3]] <- cbind(coefs[[3]], shuffle(pb3b, phb3))
```

```{r}
coefs[[1]]

coefs[[2]]

coefs[[3]]
```




